options(scipen=999) #so we dont have numbers in scientific
#create small functions for easier access and load libraries
h <- function(x){head(x,1)}
#to get number of unique values easier
uni <- function(x){length(unique(x))}
#to get the mode of character values
mode <- function(x){names(tail(sort(table(x)),1))}
#create categories by using two variables
categories <- function(x,y){
q <- quantile(x,c(0.3,0.6,0.9))
q2 <- quantile(y,c(0.3,0.6,0.9))
catx<-ifelse(x<=q[1],1,ifelse(x>q[1]&x<=q[2],2,ifelse(x>q[2]&x<=q[3],3,4)))
caty<-ifelse(y<=q2[1],1,ifelse(y>q2[1]&y<=q2[2],2,ifelse(y>q2[2]&y<=q2[3],3,4)))
z<-catx+caty
catf<-ifelse(z<=2,"Rookie",ifelse(z>2&z<=4,"Amateur",ifelse(z>4&z<=6,"Advanced","Elite")))
catf
}
#libraries
library(lubridate)
library(data.table)
library(dplyr)
library(readxl)
library(ggplot2)
library(plotly)
#Base Script
setwd('C:/Users/mserrano/OneDrive - IESEG/MSc/BA TOOLS OPEN SOURCE/Group Project')
#Load the information
load("DataGroupAssignment.Rdata")
#We have 3 tables and we are going to change names
demo<- data.frame(Demographics)
chips<- data.frame(PokerChipConversions)
daily<- data.frame(UserDailyAggregation)
rm(Demographics,PokerChipConversions,UserDailyAggregation)
# 1.- CLEANING DATA -----------
# DEMOGRAPHICS -----------
#first fix dates
names(demo)
## [1] "UserID" "Country" "Language" "RegDate"
## [5] "FirstPay" "FirstAct" "FirstSp" "FirstCa"
## [9] "FirstGa" "FirstPo" "ApplicationID" "Gender"
demo$RegDate<-as.Date(demo$RegDate)
for (i in c(5:10)){
demo[,i]<-as.Date(demo[,i],format="%Y%m%d")
}
#bring the names of languages and countries
countries<-read_excel("Appendices Group Assignment.xlsx", sheet = 2)
languages<-read_excel("Appendices Group Assignment.xlsx", sheet = 3)
application<-read_excel("Appendices Group Assignment.xlsx", sheet = 4)
names(countries)[2]<-"Country_Name"
names(languages)[2]<-"Language_Name"
names(application)[2]<-"Application_Name"
demo<-merge(demo,countries,by="Country")
demo<-merge(demo,languages,by="Language")
demo<-merge(demo,application,by="ApplicationID")
#drop codes
demo$ApplicationID<-NULL
demo$Language<-NULL
demo$Country<-NULL
#change gender
demo$Gender <- ifelse(demo$Gender == 1, "Male","Female")
#check dates of register and first play
summary(demo$RegDate) #all of them are within the period
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## "2005-02-01" "2005-02-09" "2005-02-16" "2005-02-15" "2005-02-22" "2005-02-27"
summary(demo$FirstAct) #2 NAs
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## "2005-02-01" "2005-02-12" "2005-02-21" "2005-03-01" "2005-02-27" "2005-10-02"
## NA's
## "2"
table(demo$FirstAct>=demo$RegDate) #all of them are >=
##
## TRUE
## 42647
demo <- subset(demo,!is.na(demo$FirstAct)) #remove 2 NAs
# USER DAILY AGGREGATION -----------
#first fix dates
daily$Date<-as.Date(daily$Date,format="%Y%m%d")
#merge product name
prod<-read_excel("Appendices Group Assignment.xlsx", sheet = 1)
names(prod)[2]<-"Product_Name"
daily<-merge(daily,prod,by="ProductID")
#bring first pay date from demographics
firstpay <- demo[,c("UserID","FirstPay")]
table(daily$UserID %in% firstpay$UserID)
##
## FALSE TRUE
## 64793 1675403
daily <- merge(daily,firstpay,by="UserID",all.x=T)
# POKERCHIPS -----------
#first fix dates in pokerchips
chips$TransDateTime<-as.POSIXct(chips$TransDateTime,format="%Y-%m-%d %H:%M:%S")
chips$TransType <- ifelse(chips$TransType == 124, "Buy","Sell")
# 2.- CREATION OF NEW VARIABLES -----------
# DEMOGRAPHICS ------
demo$N_Cats_Played <- ifelse(!is.na(demo$FirstSp),1,0) + ifelse(!is.na(demo$FirstCa),1,0) +
ifelse(!is.na(demo$FirstGa),1,0)+ifelse(!is.na(demo$FirstPo),1,0)
demo$interval_firstPay<- as.numeric(demo$FirstPay-demo$RegDate)
hist(demo$interval_firstPay)
demo$Poker <- ifelse(!is.na(demo$FirstPo),"Yes","No")
# USER DAILY AGREGGATION ------
daily <- data.table(daily)
#we are going to subset for only the dates that happened after the first pay and the ones the dont match
daily <- subset(daily, daily$Date>=daily$FirstPay)
#we are going to eliminate the transactions that dont have stakes, bets, or wins because they were inactive
daily <- subset(daily, !(daily$Stakes==0&daily$Winnings==0&daily$Bets==0))
#days of week
daily$weekday <- weekdays(daily$Date)
#write tables
#write.csv(demo,"Demographics Dep.csv",row.names=F)
#write.csv(daily,"UserDaily Dep.csv",row.names=F)
#group by user id
daily2<- daily[,.(Stakes=sum(Stakes),Winnings=sum(Winnings),Bets=sum(Bets),DaysPlayed = .N,
FirstDate = min(Date), LastDate = max(Date),
N_Products = uni(Product_Name),
Bet_Products = paste(unique(Product_Name),collapse = ", "),
CasinoBossMedia=sum(Product_Name=="Casino BossMedia"),
CasinoChartwell=sum(Product_Name=="Casino Chartwell"),
GamesBwin=sum(Product_Name=="Games bwin"),
GamesVS=sum(Product_Name=="Games VS"),
SportsBookFixed=sum(Product_Name=="Sports book fixed-odd"),
SportsBookLive=sum(Product_Name=="Sports book live-action"),
Supertoto=sum(Product_Name=="Supertoto"),
MostActiveDay=mode(weekday)),by=UserID]
daily2$balance <- daily2$Winnings-daily2$Stakes #balance per person
daily2$casino_profitability <- 1-daily2$Winnings/daily2$Stakes #how much the casino wins per person (1- client profitability)
daily2$client_profitability <- daily2$Winnings/daily2$Stakes #client profitability
daily2$meanBets <- round(daily2$Bets/daily2$DaysPlayed,0) #mean bets per day
daily2$StakesBet <- round(daily2$Stakes/daily2$Bets,2) #mean stakes per bet
daily2$WinningsBet <- round(daily2$Winnings/daily2$Bets,2) #mean wins per bet
daily2$BalanceBet <- round(daily2$balance/daily2$Bets,2) #mean balance (win or lose) per bet
daily2$Recency <- as.numeric(as.Date("2005-09-30")- daily2$LastDate) #recency is number of days since last game
daily2$since_first <- as.numeric(as.Date("2005-09-30")- daily2$FirstDate) #number of days since first game
daily2$one_time_player <- ifelse(daily2$since_first == daily2$Recency,"YES","NO") #user only played one day until now
# POKERCHIPS ------
#filter dates before pay in
chips <- merge(chips, firstpay, by="UserID",all.x = T)
table(chips$TransDateTime>=chips$FirstPay)
##
## FALSE TRUE
## 60 277809
nrow(chips) #277907
## [1] 277907
chips <- subset(chips,chips$TransDateTime>=chips$FirstPay)
summary(chips$TransDateTime) #remove dates after september 30
## Min. 1st Qu. Median
## "2005-02-01 00:51:00" "2005-03-28 20:16:00" "2005-05-23 21:43:00"
## Mean 3rd Qu. Max.
## "2005-05-30 13:18:43" "2005-07-31 14:17:00" "2005-10-03 00:07:00"
nrow(chips) #277809
## [1] 277809
chips <- subset(chips,chips$TransDateTime<=as.Date("2005-09-30"))
#create additional variables
chips$weekday <- weekdays(chips$TransDateTime)
chips$hour_of_day<-hour(chips$TransDateTime)
summary(chips$hour_of_day)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 11.00 16.00 14.78 20.00 23.00
#chips$difftime <- diffDate(chips$TransDateTime)
#difftime(chips$TransDateTime[1],chips$TransDateTime[2])
chips$time_of_day<-ifelse(chips$hour_of_day>=2&chips$hour_of_day<=8,"Early Morning",
ifelse(chips$hour_of_day>8&chips$hour_of_day<=14,"Morning",
ifelse(chips$hour_of_day>14&chips$hour_of_day<=20,"Afternoon",
"Night")))
chips <- data.table(chips)
#write.csv(chips,"PokerChips Dep.csv",row.names=F)
table_buy <- chips[TransType=="Buy"][,.(BuyAmount=sum(TransAmount),N_buys=.N),by=UserID]
table_sell <- chips[TransType=="Sell"][,.(SellAmount=sum(TransAmount),N_sells=.N),by=UserID]
#general table
table(chips$time_of_day)
##
## Afternoon Early Morning Morning Night
## 110073 22852 75784 65405
table_chips<-chips[,.(MostActiveDayPoker=mode(weekday),Early_Morning = sum(time_of_day=="Early Morning"),
Morning = sum(time_of_day=="Morning"),
Afternoon = sum(time_of_day=="Afternoon"),
Night = sum(time_of_day=="Night"),
PrefTimeOfDay = mode(time_of_day)),by=UserID]
table_chips<- merge(table_chips,table_buy,by="UserID",all.x = T, all.y = T)
table_chips<- merge(table_chips,table_sell,by="UserID",all.x = T, all.y = T)
table_chips$BuyAmount<- ifelse(is.na(table_chips$BuyAmount),0,table_chips$BuyAmount)
table_chips$SellAmount<- ifelse(is.na(table_chips$SellAmount),0,table_chips$SellAmount)
table_chips$N_buys<- ifelse(is.na(table_chips$N_buys),0,table_chips$N_buys)
table_chips$N_sells<- ifelse(is.na(table_chips$N_sells),0,table_chips$N_sells)
table_chips$BalancePoker <- table_chips$SellAmount-table_chips$BuyAmount
table_chips$MeanBuy <- ifelse(is.na(table_chips$BuyAmount/table_chips$N_buys),0,table_chips$BuyAmount/table_chips$N_buys)
table_chips$MeanSell <- ifelse(is.na(table_chips$SellAmount/table_chips$N_sells),0,table_chips$SellAmount/table_chips$N_sells)
table_chips$sell_ratio <- ifelse(is.infinite(table_chips$SellAmount/table_chips$BuyAmount),0,table_chips$SellAmount/table_chips$BuyAmount)
#create some categories to filter later
quantile(table_chips$BuyAmount,c(0.1,0.5,0.9))
## 10% 50% 90%
## 10.5000 263.7186 5027.2042
table_chips$Player_Category <- categories(table_chips$BuyAmount,table_chips$sell_ratio)
barplot(table(table_chips$Player_Category))
#this part is using a loop to get the mean time lapse (per player) between buy ins within the same game
users<-data.frame("UserID"=unique(chips$UserID))
users$mean_buy_interval <-NA
# test <-data.frame()
# for (i in 1:nrow(users)){
#
# test <- subset(chips,chips$UserID==users$UserID[i])
#
# if (sum(test$TransType=="Buy")==1|!test$TransType%in%"Buy"){next}
#
# timedif <- vector()
# for (j in 2:nrow(test)){
# timedif[j] <- difftime(test$TransDateTime[j],test$TransDateTime[j-1])
# }
#
# sells <- c(which(test$TransType=="Sell")+1,which(test$TransType=="Sell"))
# rows <-c(1:nrow(test))
# buys <- !rows%in%sells
# users$mean_buy_interval[i]<- mean(timedif[buys],na.rm = T)
#
# }
#the ones with NA are the ones that have only one buy in or only sell in their transactions
#merge
table_chips<- merge(table_chips,users,by="UserID")
summary(users$mean_buy_interval)
## Mode NA's
## logical 2361
# 3.- DATA MART -----------
dtmart <- merge(demo,daily2,by="UserID",all.x = T)
dtmart <- merge(dtmart,table_chips,by="UserID",all.x = T)
#fix a gender that is empty with male
dtmart$Gender<-ifelse(is.na(dtmart$Gender),"Male",dtmart$Gender)
#write.csv(dtmart,"Data Mart Bet and Poker.csv",row.names = F)
# 4.- GRAPHICS -----------
colors <- c('#D0CBCA', '#E5562A', '#336EF9','#151F47')
names(dtmart)
## [1] "UserID" "RegDate" "FirstPay"
## [4] "FirstAct" "FirstSp" "FirstCa"
## [7] "FirstGa" "FirstPo" "Gender"
## [10] "Country_Name" "Language_Name" "Application_Name"
## [13] "N_Cats_Played" "interval_firstPay" "Poker"
## [16] "Stakes" "Winnings" "Bets"
## [19] "DaysPlayed" "FirstDate" "LastDate"
## [22] "N_Products" "Bet_Products" "CasinoBossMedia"
## [25] "CasinoChartwell" "GamesBwin" "GamesVS"
## [28] "SportsBookFixed" "SportsBookLive" "Supertoto"
## [31] "MostActiveDay" "balance" "casino_profitability"
## [34] "client_profitability" "meanBets" "StakesBet"
## [37] "WinningsBet" "BalanceBet" "Recency"
## [40] "since_first" "one_time_player" "MostActiveDayPoker"
## [43] "Early_Morning" "Morning" "Afternoon"
## [46] "Night" "PrefTimeOfDay" "BuyAmount"
## [49] "N_buys" "SellAmount" "N_sells"
## [52] "BalancePoker" "MeanBuy" "MeanSell"
## [55] "sell_ratio" "Player_Category" "mean_buy_interval"
This project was conducted by combining data of online gambling company in R and generating insights at customer level report from data mart on dashboard in shiny. Moreover, the target customer including demographics, geographic locations, gambling habits etc. was demonstrated. In addition, the report also presented the details of company bestseller products and profitability of each product. The poker player was focused on to discover their play patterns in order to interpret the average amount of betting and winning as well as the balance. The last part was the overview of company’s revenue, profit and loss.
Let’s take a look at some of the insights of the online gambling website:
Played frequency: The average social casino gamer played 35 days in the 8 months period (from February till September) which represents a 4 times-play per month.The game players are loyal, they go to the casino, approximately once a week.
Bet frequency: On average, the clients bet around 10 times per day played. It shows website’s ability to make betting attractive for clients.
Players preferences: Gamers enjoy SportsBook categories, both fixed and alive. They play on those categories 4 times per month. These categories are preferred 66% times more over the others.
Website profitability: For each dollar that a player invests, the casino is able to capture 30 cents of it. We can assure that on average, the client loses 30% of what he invests.
Loyalty:93% of the clients of the casino are not one-time players which currently shows the power of the casino to retain its customers.
Preferred time played:The most popular day for playing in the casino is on Saturday while poker players prefer playing on Tuesdays.Moreover, poker is played mainly in the afternoon.
#Client profile
It is important to consider and understand the target audience of the online gambling website because it has a direct impact on the ability to generate revenue based on the “right” content displayed and provided such as the different language and payments options or even the future marketing campaigns.
Gender: To begin with, We can see that most of the players are men. Females only represent an 8.5% of the players.
dtmart %>% count(Gender) %>%
plot_ly(labels = ~Gender,values = ~n, type = "pie", textinfo = "label+percent",
showlegend = F)
Nationality: The bast majority of them are German, this can be due to the fact that the origin of the online website is Germany so its popularity its much higher in this country.Other nationalities that are representative in the website are Turkish, Polish, Spanish and Greek.
#reference: https://plotly.com/r/choropleth-maps/
maps <- read.csv("https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv")
names(maps)
## [1] "COUNTRY" "GDP..BILLIONS." "CODE"
df <- dtmart %>% count(Country_Name)
names(df)[1]<-"COUNTRY"
df <- merge(df,maps,by="COUNTRY",all.x = T)
#fix some NAs
df$COUNTRY[is.na(df$CODE)]
## [1] "Diego Garcia" "FYR Macedonia"
## [3] "Holland" "Martinique"
## [5] "Moldavia" "Netherlands Antilles"
## [7] "Norfolk Island" "Pitcairn"
## [9] "Russian Federation" "Serbia and Montenegro"
## [11] "Tunesia" "United States Virgin Islands"
## [13] "USA"
df$CODE[df$COUNTRY=="USA"] <- "USA"
df$CODE[df$COUNTRY=="Russian Federation"] <- "RUS"
df$CODE[df$COUNTRY=="Moldavia"] <- "MDA"
df$CODE[df$COUNTRY=="Tunesia"] <- "TUN"
df$CODE[df$COUNTRY=="Holland"] <- "NLD"
plot_ly(df,type='choropleth', locations=df$CODE, z=df$n, text=df$Country_Name, colorscale="Greens")
Language: Knowing the players nationality can help the website to provide the correct language to display gaming information and advertisement. If most of the players are German, they should advertise themselves in German, to reach out the majority of future potential clients. Based on the graph we can see that there are 26.000 clients out of the 42.647 that speak german, representing the 61% of the clients.
dtmart %>% count(Language_Name) %>%
plot_ly(x=~Language_Name,y=~n) %>% add_bars(marker = list(color = c(colors,colors,colors,colors))) %>%
layout(xaxis = list(categoryorder = "total descending"))
Date registration: There isn’t a pattern concerning the registration date of the customers but we can observe that there was a huge peak on the 22 of February, 2633 persons registered. This could be a reaction of an advertisement campaign from the casino.
#registered by date
dtmart<-data.table(dtmart)
regs<- dtmart[,.N,by=RegDate]
plot_ly(regs, type = 'scatter', mode = 'lines', fill = 'tozeroy')%>%
add_trace(x = ~RegDate, y = ~N,marker = list(color = "rgb(21, 31, 71)"),
line = list(color ='rgb(21,31,71'),
fillcolor='rgba(50, 100, 250, 0.4)') %>%
layout(showlegend = F)
First activity vs first payment:The tendency of both the first payment and the first activity is the same. Players don’t wait to play once they have deposited their money.It could be due mainly to 2 reasons: clients premeditated before putting the money on the website so that they know how they want to spend it or they are impulsive, once they deposit the money they can’t wait to play. What we can also see is that the first time played happens mostly in march, one month after the registration. The clients don’t wait long to start playing.
plot_ly(alpha=0.4) %>% add_histogram(dtmart$FirstPay, name="First Pay", marker = list(color = "rgba(21, 31, 71,0.8)")) %>%
add_histogram(dtmart$FirstAct, name="First Activity",marker = list(color = "rgba(254, 95, 48,0.8)")) %>%
layout(barmode="stack",legend = list(orientation = 'h'))
Most of the player’s access come from the BETANDWIND.DE and BETANDWIN.COM apps with 51.7% and 34% respectively. These are the two main traffic sources for the company, marketing campaigns should be deployed mainly on these two applications to increase the engagement of existing customers and attract more potential customers. At the same time, this will help to build a large and strong community of players for the company.
dtmart %>% count(Application_Name) %>%
plot_ly(labels = ~Application_Name,values = ~n, textinfo = "label+percent",
showlegend = F) %>% add_pie(hole = 0.4)
Once we build our client’s profile, it is essential to understand their betting behavior.
The most popular category of the casino is the sports book category, both for the fixed and live.
mean_daily<- daily[,.(Users=uni(UserID),Product=h(Product_Name)),by=paste(Date,ProductID)][,.(Average=mean(Users)),by=Product]
mean_daily_p<- chips[,.(Users=uni(UserID),Product="Poker"),by=as.Date(TransDateTime)][,.(Average=mean(Users)),by=Product]
mean_daily<-rbind(mean_daily,mean_daily_p)
plot_ly(mean_daily,x = ~Product,y = ~Average) %>%
add_bars(marker = list(color = c("rgb(254, 95, 48)","rgb(21, 31, 71)")))
The number of users in the online gambling website increased significantly the first 2 months. After that it stabilized and is having a stable number of players.
users_daily<- daily[,.(Users=uni(UserID)),by=Date][order(Date)]
users_daily1<- daily[ProductID==1][,.(Users_1=uni(UserID)),by=Date][order(Date)]
users_daily2<- daily[ProductID==2][,.(Users_2=uni(UserID)),by=Date][order(Date)]
users_daily4<- daily[ProductID==4][,.(Users_4=uni(UserID)),by=Date][order(Date)]
users_daily5<- daily[ProductID==5][,.(Users_5=uni(UserID)),by=Date][order(Date)]
users_daily6<- daily[ProductID==6][,.(Users_6=uni(UserID)),by=Date][order(Date)]
users_daily7<- daily[ProductID==7][,.(Users_7=uni(UserID)),by=Date][order(Date)]
users_daily8<- daily[ProductID==8][,.(Users_8=uni(UserID)),by=Date][order(Date)]
users_daily_p<- chips[,.(Poker_Users=uni(UserID)),by=as.Date(TransDateTime)][order(as.Date)]
names(users_daily_p)[1] <-"Date"
users_daily<-merge(users_daily,users_daily1,by="Date",all.x=T,all.y=T)
users_daily<-merge(users_daily,users_daily2,by="Date",all.x=T,all.y=T)
users_daily<-merge(users_daily,users_daily4,by="Date",all.x=T,all.y=T)
users_daily<-merge(users_daily,users_daily5,by="Date",all.x=T,all.y=T)
users_daily<-merge(users_daily,users_daily6,by="Date",all.x=T,all.y=T)
users_daily<-merge(users_daily,users_daily7,by="Date",all.x=T,all.y=T)
users_daily<-merge(users_daily,users_daily8,by="Date",all.x=T,all.y=T)
users_daily<-merge(users_daily,users_daily_p,by="Date",all.x=T,all.y=T)
plot_ly(users_daily, type = 'scatter', mode = 'lines', fill = 'tozeroy')%>%
add_trace(x = ~Date, y = ~Users,line = list(color ='rgb(21, 31, 71)'),
fillcolor='rgba(50, 100, 250, 0.4)') %>%
layout(showlegend = F)
byprod<- dtmart[,.(CasinoBossMedia=sum(CasinoBossMedia),CasinoChartwell=sum(CasinoChartwell),
GamesBwin=sum(GamesBwin),GamesVS=sum(GamesVS),SportsBookFixed=sum(SportsBookFixed),
SportsBookLive=sum(SportsBookLive),Supertoto=sum(Supertoto)),by=MostActiveDay][!is.na(MostActiveDay)]
plot_ly(byprod, x=~MostActiveDay,y=~SportsBookFixed, name="Sports Book Fixed", type="bar") %>%
add_trace(y=~SportsBookLive, name="Sports Book Live") %>%
add_trace(y=~GamesBwin, name="Games Bwin") %>%
add_trace(y=~GamesVS, name="Games VS" ) %>%
add_trace(y=~CasinoBossMedia, name="Casino Boss Media" ) %>%
add_trace(y=~CasinoChartwell, name="Casino Chartwell" ) %>%
add_trace(y=~Supertoto, name= "Supertoto" ) %>%
layout(yaxis = list(title = 'Count'),xaxis = list(categoryorder = "total descending"), barmode = 'stack')
Users by product and date: Sports books fixed are the all-day bestsellers, followed by sports books live. Saturday, Sunday, and Wednesday have the highest number of players, respectively, while Monday is the day with the lowest amount of players.
Moreover, most of the players are regular players corresponding to 39,266k players while one-time players are the minority.
dtmart %>% count(one_time_player) %>%
plot_ly(x = ~one_time_player,y = ~n) %>% add_bars(marker = list(color = c("rgb(254, 95, 48)","rgb(21, 31, 71)")))
The average return of all products is 28.84% and 13.84 over 15% profit. As mentioned before, for each dollar that a player invests, the casino is able to capture 30 cents of it. We can assure that on average the client loses 30% of what he invests, so this is the profitability of the online gambling website.
plot_ly(
domain = list(x = c(0, 100), y = c(0, 100)),
value = mean(dtmart$casino_profitability,na.rm=T)*100,
title = list(text = "Profitability"), type = "indicator",
mode = "gauge+number+delta", number = list(suffix = "%"),
gauge = list(bar = list(color = "rgb(21,31,71)"),
bordercolor = "gray"),
delta = list(reference = 15, increasing = list(color = "darkgreen"),decreasing=list(color = "darkred"))) %>%
layout(margin = list(l=20,r=30))
The most active bet days are mainly on Wednesdays and on the weekends. Of which Wednesday accounted for 24.6% while Saturday and Sunday accounted for 23.3% and 19.7% respectively. Promotions should be focused on these days of the week to capture the attention of potential customers and increase the bets of existing customers.
dtmart %>% count(MostActiveDay) %>%
plot_ly(labels = ~MostActiveDay,values = ~n, type = "pie", textinfo = "label+percent",
marker = list(color = c("#151F47", "#AA3344")),
showlegend = F)
Recency bets show that most of the players last bet happened the last 35 to 50 days. This is mainly due to the fact that players are active. Contrarily, we can easily see that a significant amount of bets happened for the last time 200 days ago which suggests that there are some inactive players. The online website can do a re-activation campaign to make these clients bet again.
plot_ly(alpha=0.4) %>% add_histogram(dtmart$Recency, marker = list(color = "rgba(27, 63, 10,0.8)")) %>%
layout(barmode="stack",legend = list(orientation = 'v'))
The last step of the marketing analysis should be to focus on the poker games of the online gambling website.
We can the number of daily poker users:
plot_ly(users_daily, type = 'scatter', mode = 'lines', fill = 'tozeroy')%>%
add_trace(x = ~Date, y = ~Poker_Users,line = list(color ='rgb(254, 95, 48)'),
fillcolor='rgba(246, 143, 112, 0.4)') %>%
layout(showlegend = F)
Regarding the poker balance, which is calculated from the amount of stakes and bets, we can conclude that it tends to be negative most of the time. This shows that players lose more than what they gain when they bet on poker regardless the time of the day or the weekday. Balance poker has a large fluctuation in the afternoon in all days of the week, especially Sunday where the highest negative peak is reached. On Friday afternoon, balance poker reaches its highest positive number which means that is when poker players make most of the money.
plot_ly(data = dtmart[MeanBuy<quantile(MeanBuy,0.99,na.rm=T)],
x = ~MostActiveDayPoker, y = ~round(BalancePoker,2),
color = ~PrefTimeOfDay, colors=colors) %>%
layout(yaxis=list(title="Balance"),xaxis=list(title=""),legend = list(orientation="h"))
Players tend to buy the most number of chips at early morning, followed by night while afternoon is the least. It can be said that the average buy poker of early morning players is more consistent compared to other times of day. Especially in the morning has some outliers, what this suggests is that during the period being considered, many players tend to buy poker many times more than the normal pattern.
plot_ly(dtmart[MeanBuy<quantile(MeanBuy,0.98,na.rm=T)], y=~MeanBuy, color= ~PrefTimeOfDay, type="box") %>%
layout(showlegend = F)
Average sell of poker in the early morning has the highest value and is more consistent than at other times of the day, followed by the afternoon. However, afternoon tends to be the most inconsistent as there are many outlier values, indicating more influence leading to selling than the normal pattern. While afternoon has the second highest average sell of poker value, the average buy of poker value is the lowest compared to other times of the day.
plot_ly(dtmart[MeanSell<quantile(MeanSell,0.98,na.rm=T)], y=~MeanSell, color= ~PrefTimeOfDay, type="box") %>%
layout(showlegend=F)
The majority of poker players are in the advanced and amateur categories with 38.2% and 36.6% respectively. This shows that the level of the player does not affect the propensity to choose to play poker.
dtmart[Poker=="Yes"] %>% count(Player_Category) %>%
plot_ly(labels = ~Player_Category,values = ~n, type = "pie", textinfo = "label+percent",
showlegend = F)
chipsb<-chips[TransType=="Buy"][,.(Buys=mean(TransAmount),time_of_day=h(time_of_day)),by=hour_of_day]
chipss<-chips[TransType=="Sell"][,.(Sells=mean(TransAmount)),by=hour_of_day]
chipss2<-merge(chipsb,chipss,by="hour_of_day")
plot_ly(chipss2, x=~Sells, y=~Buys, color=~time_of_day, size=~Buys, marker = list(opacity = 0.8),
sizes=c(10,400))
In this plot we see that most of the higher buy ins and sells are made in the night and early morning.